clear  // Clear all data and variables from memory

*****************************************************************************
* IDENTIFIES LISTED GROUPS
*****************************************************************************


*******************************************************************************************

* Import data from a CSV file using the specified delimiter
insheet using "${output_r}Orbis_ListedCompanies_France.csv", delim(";") case names

* Drop the variable v1
drop v1 

* Keep only observations where the NationalIDlabel is "SIREN number"
keep if NationalIDlabel == "SIREN number"

* Apply a string format to the Companyname variable for better display
format %30s Companyname

* Generate a duplicate indicator variable by BvDIDnumber to identify duplicates
bys BvDIDnumber: gen dup = cond(_N == 1, _N, 0)  // If there's only one observation, set dup to 1; else, set to 0

* Display frequency of duplicates
tab dup

* Drop the dup variable as it is no longer needed
drop dup

* Keep only relevant variables
keep NationalIDnumber IPOdate Delisteddate ListedDelistedUnlisted

* Rename variables for consistency and clarity
rename (NationalIDnumber IPOdate Delisteddate ListedDelistedUnlisted) ///
       (siren ipodate delistdate listed_status)

* Display frequency table for the listed status variable
tab listed

* Convert IPO date to Stata date format
gen date = date(ipodate, "DMY")  // Convert from "day-month-year" string to Stata date
format date %td
drop ipodate  // Drop the original date string variable
rename date ipodate  // Rename the formatted date variable back to ipodate

* Convert Delisting date to Stata date format
gen date = date(delistdate, "DMY")
format date %td
drop delistdate
rename date delistdate

* Display frequency tables for IPO and delisting dates
tab ipodate
tab delistdate

* Create year variables from the IPO and delisting dates
gen ipoyear = year(ipodate)
gen delistyear = year(delistdate)

* Save the dataset in Stata format
save "${output_r}listed_companies.dta", replace

*Group Level Analysis
* Merge with another dataset by the siren variable 
merge 1:m siren using "${input_stata}\links.dta"
drop if _merge == 1 

* Create a dummy variable indicating that the entity is listed and not a merger/acquisition entry
gen one_listed = (_merge == 3) * (entryMA == 0) 

* Collapse the data by group and year to get the sum of listed entities
collapse (sum) one_listed, by(sirtg year)  

* Create a binary variable indicating if there is at least one listed entity in the group
gen listed_gr = one_listed > 0 
drop one_listed

save "${output_stata}listed_group.dta", replace
